# load packages
library(readtext)
library(tidyverse)
library(quanteda)

# import the human rights reports
reports <- readRDS("reports.RDS")

# annual number of reports (fig 1)
reports %>% ggplot() +
  geom_bar(aes(x = year, fill = organization), position = "dodge") +
  labs(x = "Year", y = "Number of Reports", fill = "Organization") +
  scale_fill_brewer(palette = "Set1") +
  theme_bw() +
  theme(legend.position = "bottom")
ggsave("Figures/fig1.jpg", height = 3.5, width = 7)

# build the corpus
corp <- corpus(reports, text_field = "text")

# tokenization
toks <- tokens(corp, remove_punct = TRUE)

# save the tokens
saveRDS(toks, "tokens.RDS")